#################################################################################
# Replication file for:                                                         #
# "Balancing Precision and Retention in Experimental Design"                    #
#                                                                               #
# Gustavo Diaz                                                                  #
# Northwestern University                                                       #
# gustavo.diaz@northwestern.edu                                                 #
#                                                                               #
# Erin L. Rossiter                                                              #
# University of Notre Dame                                                      #
# erossite@nd.edu                                                               #
#                                                                               #
# This file analyzes the handcoding of published experiments to produce the     #
# statistic reported in Section 1 and the statistics and table in Appendix D.   #
#################################################################################

# Data -----
df <- read.csv("./data/raw_data/handcoding.csv")

# Appendix D.1 text -----
# number of unique articles
df %>%
  distinct(doi) %>%
  nrow()

# number of unique articles handcoded
df %>%
  filter(in_sample == 1) %>%
  distinct(doi) %>%
  nrow()

# number of unique experiments handcoded
df %>%
  filter(in_sample == 1) %>%
  distinct(doi, experiment_id) %>%
  nrow()

# percent that collected pre-treatment covariates
# among non-prepost and non-blocking experiments
df %>%
  filter(in_sample == 1 & block == 0 & prepost == 0) %>%
  summarize(collect_pt = mean(pretreatment_covars))

# Appendix Table D1 -----
design_stats <- df %>%
  filter(in_sample == 1) %>%
  group_by(block, prepost) %>%
  summarise(n = n(),
            med_samplesize = median(sample_size, na.rm = T),
            quant25_samplesize = quantile(sample_size, .25, na.rm=T),
            quant75_samplesize = quantile(sample_size, .75, na.rm=T),
            IQR_samplesize = paste0(med_samplesize, " (", quant25_samplesize, ", ", quant75_samplesize, ")"),
            med_arms = median(n_arms, na.rm = T),
            quant25_arms = quantile(n_arms, .25),
            quant75_arms = quantile(n_arms, .75),
            IQR_arms = paste0(med_arms, " (", quant25_arms, ", ", quant75_arms, ")"),
            .groups = "drop") %>%
  select(-med_samplesize, -quant25_samplesize, -quant75_samplesize,
         -med_arms, -quant25_arms, -quant75_arms) %>%
  ungroup() %>%
  mutate(pct = round((n/sum(n))*100)) %>%
  mutate(d = paste0(pct, "\\% (", n, ")"), .after = n) %>%
  select(-n, -pct) %>%
  mutate(lab = case_when(
    block == 0 & prepost == 0 ~ "Neither",
    block == 1 & prepost == 0 ~ "Only block randomization",
    block == 0 & prepost == 1 ~ "Only prepost",
    block == 1 & prepost == 1 ~ "Both block randomization and prepost"
  ), .before = block) 

# Excluding and reorganizing columns
design_stats <- design_stats %>%
  select(-block, -prepost) %>%
  select(lab, d, everything())

tableD1 <- kbl(design_stats,
               format = "latex",
               align = "l",
               escape = F,
               col.names = c("", "Prevalence", "Median (IQR) Sample Size", "Median (IQR) Arms"),
               booktabs = TRUE,
               caption = "Current Use of Alternative Designs to Increase Precision\\label{tab:handcoding}") %>%
  kable_styling(full_width = FALSE, font_size = 10) %>%
  footnote(general = "The second column shows the percentage of designs used in a sample
           of 217 experiments from articles published in 2022-2023, with the number of
           experiments in parentheses. The third and fourth columns show the median sample
           size and number of experimental arms per type of design, with the interquartile range
           in parentheses.",
           footnote_as_chunk = TRUE, threeparttable = TRUE)

writeLines(tableD1, "./tables/tableD1.txt")